1 Introduction

The objective of this notebook is to correct the cold-shock transcriptional signature from the CLL scRNA-seq dataset to correct for the technical artifact introduced by sampling time while presarving the biological variation. We will follow a similar approach to the one we used for healthy PBMC.

2 Pre-processing

2.1 Package loading

library(scater)
library(scran)
library(Seurat)
library(ggpubr)
library(biomaRt)
library(org.Hs.eg.db)
library(AnnotationDbi)
library(GOplot)
library(GOstats)
library(topGO)
library(ggrepel)
library(viridis)
library(pheatmap)
library(tidyverse)

2.2 Source script with functions

source("bin/utils.R")

2.3 Load data

cll_rt_l <- readRDS("results/R_objects/cll_rt_seurat_list.rds")
dea_list <- readRDS("results/R_objects/dea_results_per_patient.rds")

2.4 Merge Seurat objects

cll_merged <- merge(x = cll_rt_l$`1220`, y = cll_rt_l$`1472`, add.cell.ids = c("1220", "1472"))
cll_merged$time <- factor(cll_merged$time, levels = c("0h", "2h", "4h", "6h", "8h", "24h"))

3 Calculate cold-shock signature

Idents(cll_merged) <- "time"
cll_merged <- pre_process_seurat(cll_merged)
original_umap <- DimPlot(cll_merged, reduction = "umap", cols = viridis(6), pt.size = 0.75)
original_umap

# ggsave(
#   filename = "results/plots/uncorrected_umap.pdf", 
#   plot = original_umap, 
#   width = 10, 
#   height = 9
# )

# Find cold shock signature
cold_shock_signature <- dea_list$`1892`$gene[dea_list$`1892`$avg_logFC > 0][1:200]

# Compute cold shock score
cll_merged <- AddModuleScore(cll_merged, features = list(cold_shock_signature), name = "cold_shock_score")
umap_cold_shock_score <- FeaturePlot(cll_merged, features = "cold_shock_score1", cols = viridis(20), pt.size = 0.75)
umap_cold_shock_score

# ggsave(
#   filename = "results/plots/umap_cold_shock_score.pdf", 
#   plot = umap_cold_shock_score, 
#   width = 10, 
#   height = 9
# )
violin_cold_shock_score <- VlnPlot(
  cll_merged, 
  features = "cold_shock_score1", 
  pt.size = 0, 
  group.by = "time",
  cols = viridis(6)
)
violin_cold_shock_score

# ggsave(
#   filename = "results/plots/violin_cold_shock_score.pdf", 
#   plot = violin_cold_shock_score, 
#   width = 10, 
#   height = 7
# )

4 Regress out signature

lmp <- function (modelobject) {
  if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
  f <- summary(modelobject)$fstatistic
  p <- pf(f[1], f[2], f[3], lower.tail = FALSE)
  attributes(p) <- NULL
  return(p)
}

# Regress the expression of each gene on cold shock-score
cll_merged <- FindVariableFeatures(cll_merged)
mat <- as.matrix(cll_merged[["RNA"]]@data[VariableFeatures(cll_merged), ])
lm_list <- apply(mat, 1, function(x) lm(x ~ cll_merged$cold_shock_score1))
names(lm_list) <- VariableFeatures(cll_merged)

# Distribution of p-values
p_values_list <- map_dbl(lm_list, lmp)
p_values_df <- data.frame(p_value = p_values_list)
ggplot(p_values_df, aes(p_value)) +
  geom_histogram(bins = 100) 

# Scatter plots of key genes
genes <- c("IGLC2", "EIF1", "CIRBP", "IGLC3")
scatter_plots <- purrr::map(genes, function(gene) {
  df <- data.frame(cold_shock_score = cll_merged$cold_shock_score1, expr = cll_merged[["RNA"]]@data[gene,], cluster = cll_merged$donor)
  ggplot(df, aes(cold_shock_score, expr, color = cluster)) +
    geom_point(alpha = 0.8) +
    geom_smooth(method = "lm") +
    labs(title = gene, x = "Cold Shock Score", y = "Gene Expression") +
    theme_classic()
})
scatter_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# Keep residuals as non-explained variability
length(lm_list)
## [1] 2000
residuals_mat <- bind_rows(purrr::map(lm_list, "residuals"), .id = "gene")
residuals_mat <- t(as.matrix(residuals_mat[, 2:ncol(residuals_mat)]))
colnames(residuals_mat) <- colnames(cll_merged)
rownames(residuals_mat) <- names(lm_list)
residuals_mat_sc <- scale(residuals_mat, center = TRUE, scale = TRUE)
residuals_mat_sc <- residuals_mat_sc[rownames(cll_merged[["RNA"]]@scale.data), ]

# Include new matrix in scale.data slot and pre-process
cll_merged2 <- cll_merged
cll_merged2[["RNA"]]@scale.data <- residuals_mat_sc
cll_merged2 <- RunPCA(cll_merged2)
cll_merged2 <- RunUMAP(cll_merged2, dims = 1:20)

# Visualize correction
Idents(cll_merged2) <- "time"
palette <- c("#999999", "#92e8df", "yellow2", "limegreen", "#632c63", "#e4624e")
regressed <- DimPlot(cll_merged2, reduction = "umap", cols = palette)
original <- DimPlot(cll_merged, reduction = "umap", cols = palette)
ggarrange(plotlist = list(original, regressed), ncol = 2)

# saveRDS(list(original = original, regressed = regressed), file = "results/R_objects/ggplots/umaps_original_corrected.rds")

5 Session Information

sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.0               stringr_1.4.0               dplyr_0.8.4                 purrr_0.3.3                 readr_1.3.1                 tidyr_1.0.2                 tibble_2.1.3                tidyverse_1.3.0             pheatmap_1.0.12             viridis_0.5.1               viridisLite_0.3.0           ggrepel_0.8.1               topGO_2.38.1                SparseM_1.78                GO.db_3.10.0                GOstats_2.52.0              graph_1.64.0                Category_2.52.1             Matrix_1.2-18               GOplot_1.0.2                RColorBrewer_1.1-2          gridExtra_2.3               ggdendro_0.1-20             org.Hs.eg.db_3.10.0         AnnotationDbi_1.48.0        biomaRt_2.42.0              ggpubr_0.2.5                magrittr_1.5                Seurat_3.1.4                scran_1.14.6                scater_1.14.6               ggplot2_3.3.0               SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1 DelayedArray_0.12.2         BiocParallel_1.20.1         matrixStats_0.55.0          Biobase_2.46.0              GenomicRanges_1.38.0        GenomeInfoDb_1.22.0         IRanges_2.20.2              S4Vectors_0.24.3           
## [43] BiocGenerics_0.32.0         BiocStyle_2.14.4           
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1           AnnotationForge_1.28.0   bit64_0.9-7              knitr_1.28               irlba_2.3.3              multcomp_1.4-12          data.table_1.12.8        RCurl_1.98-1.1           generics_0.0.2           metap_1.3                cowplot_1.0.0            TH.data_1.0-10           RSQLite_2.2.0            RANN_2.6.1               future_1.16.0            bit_1.1-15.2             mutoss_0.1-12            xml2_1.2.2               lubridate_1.7.4          assertthat_0.2.1         xfun_0.12                hms_0.5.3                evaluate_0.14            fansi_0.4.1              progress_1.2.2           caTools_1.18.0           dbplyr_1.4.2             readxl_1.3.1             Rgraphviz_2.30.0         igraph_1.2.4.2           DBI_1.1.0                htmlwidgets_1.5.1        RSpectra_0.16-0          backports_1.1.5          bookdown_0.18            annotate_1.64.0          gbRd_0.4-11              RcppParallel_4.4.4       vctrs_0.2.3              ROCR_1.0-7               withr_2.1.2              sctransform_0.2.1        prettyunits_1.1.1        mnormt_1.5-6             cluster_2.1.0            ape_5.3                  lazyeval_0.2.2          
##  [48] crayon_1.3.4             genefilter_1.68.0        edgeR_3.28.1             pkgconfig_2.0.3          labeling_0.3             nlme_3.1-145             vipor_0.4.5              rlang_0.4.5              globals_0.12.5           lifecycle_0.1.0          sandwich_2.5-1           BiocFileCache_1.10.2     modelr_0.1.6             rsvd_1.0.3               cellranger_1.1.0         lmtest_0.9-37            zoo_1.8-7                reprex_0.3.0             beeswarm_0.2.3           ggridges_0.5.2           png_0.1-7                bitops_1.0-6             KernSmooth_2.23-16       blob_1.2.1               DelayedMatrixStats_1.8.0 ggsignif_0.6.0           scales_1.1.0             memoise_1.1.0            GSEABase_1.48.0          plyr_1.8.6               ica_1.0-2                gplots_3.0.3             bibtex_0.4.2.2           gdata_2.18.0             zlibbioc_1.32.0          compiler_3.6.1           lsei_1.2-0               dqrng_0.2.1              plotrix_3.7-7            fitdistrplus_1.0-14      cli_2.0.2                XVector_0.26.0           listenv_0.8.0            patchwork_1.0.0          pbapply_1.4-2            mgcv_1.8-31              MASS_7.3-51.5           
##  [95] tidyselect_1.0.0         stringi_1.4.6            yaml_2.2.1               BiocSingular_1.2.2       askpass_1.1              locfit_1.5-9.1           grid_3.6.1               tools_3.6.1              future.apply_1.4.0       rstudioapi_0.11          farver_2.0.3             Rtsne_0.15               digest_0.6.25            BiocManager_1.30.10      Rcpp_1.0.3               broom_0.5.5              RcppAnnoy_0.0.15         httr_1.4.1               npsurv_0.4-0             Rdpack_0.11-1            colorspace_1.4-1         rvest_0.3.5              XML_3.99-0.3             fs_1.3.2                 reticulate_1.14          splines_3.6.1            uwot_0.1.5               RBGL_1.62.1              statmod_1.4.34           sn_1.5-5                 multtest_2.42.0          plotly_4.9.2             xtable_1.8-4             jsonlite_1.6.1           R6_2.4.1                 TFisher_0.2.0            pillar_1.4.3             htmltools_0.4.0          glue_1.3.1               BiocNeighbors_1.4.2      codetools_0.2-16         tsne_0.1-3               mvtnorm_1.1-0            lattice_0.20-40          numDeriv_2016.8-1.1      curl_4.3                 ggbeeswarm_0.6.0        
## [142] leiden_0.3.3             gtools_3.8.1             magick_2.3               openssl_1.4.1            survival_3.1-8           limma_3.42.2             rmarkdown_2.1            munsell_0.5.0            GenomeInfoDbData_1.2.2   haven_2.2.0              reshape2_1.4.3           gtable_0.3.0